********************************************************************************
* Datamanagement: Building the dataset for the concurrent elections project *
*******************************************************************************

cls
clear all
clear matrix
set more off
capture log close

log using "C:/Userdata/Shared/Logs/Concurrent elections/datamanagement.text", replace text

cd "C:/Userdata/Shared/Dofiles/"

global dodatadf "DoData/Concurrent elections"									// DoData dofiles
global doanalysisdf "DoAnalysis/Concurrent elections"							// Doanalysis dofiles
global usingdata "E:/ProjData/Concurrent elections"								// Using data directory
global rawdata "D:/SCB_ConPol/Stata"											// Raw data directory

do "$dodatadf/programs.do"														//See separate program dofile
*

********************************************************************************
* Define the universe of individuals *******************************************
********************************************************************************


foreach k in 1998 2010 {														// Citizenship dates for 1994 are found in the file from 1998.
	use "$usingdata/RTB_`k'", clear 
	tostring MedBdat, replace

	gen year_ctz = substr(MedBdat, 1, 4)										// Generate varible date of citezenship. 
	gen month_ctz = substr(MedBdat, 5, 2)
	gen day_ctz = substr(MedBdat, 7, 2)

	destring month_ctz year_ctz day_ctz, replace

	* Only keeping those who became Swedish citizens in 1994 or 2010 *			// Only Swedish citizens have the right to vote in parliamentary elections. Where are only interested in those who eventually became citizens in the RD analysis. 
	keep if trim(Medblandnamn) == "Sverige" 
		if `k' == 2010 {
		keep if year_ctz == `k' 
		}
		else {
		keep if year_ctz == 1994
		}

	* Assert that there are no duplicates *
	duplicates tag LopNr, gen(dup)
	assert dup == 0
	drop dup
	save "$usingdata/temp`k'", replace
}
use "$usingdata/temp1998"
append using "$usingdata/temp2010" 


********************************************************************************
* Adding covariates ************************************************************
********************************************************************************

* Birth data *
merge 1:1 LopNr using "$usingdata/Fodelseuppg.dta", nogen keep(1 3)

* Age *																			
tostring FodArMan, replace

gen year_born = substr(FodArMan, 1, 4) 
gen month_born = substr(FodArMan, 5, 2)
destring year_born month_born, replace

gen age = year_ctz - year_born

* Drop indiviudals that are under 18
drop if age < 18																// Drop those that are not 18. These are not eligible to vote
drop if age == 18 & month_born > 8												// Drop if you turn 18 between September - December. Election day is in September.


* Country of origin (grouped)*
merge 1:1 LopNr using "$usingdata/FodelselandIFAUGrupp", nogen keep(1 3)
ifaulabel																		// Program: See program dofile	

* Drop nordic and Swedish born *
drop if (IFAUkod == 0 | IFAUkod == 26 | IFAUkod == 27 | IFAUkod == 28)			// Nordic citizens have other rules for voting in elections and are hence dropped. 
																					
* LISA-data 1994 and 2010*
foreach k in 1994 2010 {
	merge 1:1 LopNr using "$usingdata/LISA_`k'", nogen keep(1 3) 
}

* Labor market data 2009 (LISA)*
merge 1:1 LopNr using "$usingdata/LISA_2009", nogen keep(1 3) keepusing(LoneInk_09 SocBidrFam_09 SocBidrPersF04_09 SyssStatJ_09)


* Labor market data 1993 (LISA)*
merge 1:1 LopNr using "$usingdata/LISA_1993", nogen keep(1 3) keepusing(LoneInk_93 SocBidrFam_93 SocBidrPersF_93 SyssStat_93)

*Migration data (time in country)
merge 1:1 LopNr using "$usingdata/Migrationer.dta", nogen keep(1 3)

tostring Datum, replace
gen year_im = substr(Datum, 1, 4) 
destring year_im, replace
gen time_in_country = year_ctz-year_im
*Note; three people with negative time in country

********************************************************************************
* Adding outcome variables *****************************************************
********************************************************************************

* Voter turnout 2010 *
merge 1:1 LopNr using "$usingdata/Valdelt_2010", nogen keep(1 3)

* Voter turnout 1994 *
merge 1:1 LopNr using "$usingdata/Valdelt_1994", nogen keep(1 3)


* Define variable for right to vote in national elections *
gen Rostratt = min(Rostratt_94, Rostratt_10)
gen DRostratt = Rostratt == 1 if Rostratt < .
label var DRostratt "Right. vote, national" 


save "$usingdata/temp1.dta", replace


********************************************************************************
* Generate running variables for RD-analyses************************************
********************************************************************************

* Citizenship in one date variable *
gen edate = mdy(month_ctz, day_ctz, year_ctz)

* Treatment: Included in election roll for voting in the national election *
gen days_treat_enrolled = .
replace days_treat_enrolled = mdy(8,19,2010) - edate if year_ctz == 2010 		// 30 days before the election			
replace days_treat_enrolled = mdy(7,1,1994) - edate if year_ctz == 1994			// 79 days before the election				

* Treatment: Being a citizen at the time of the gereral election *
gen days_treat_citizen = .
replace days_treat_citizen = mdy(9,19,2010) - edate if year_ctz == 2010			// Election day, September 19
replace days_treat_citizen = mdy(9,18,1994) - edate if year_ctz == 1994			// Election day, September 18


* Define treatment groups variable *
gen Tgroup = .
replace Tgroup = 1 if days_treat_citizen < 0									// After the election. Not citizen, not able to vote
replace Tgroup = 2 if days_treat_citizen >= 0 & days_treat_enrolled < 0			// Middle group. Citizens without the right to vote. 	
replace Tgroup = 3 if days_treat_enrolled >= 0 									// Citizens with the right to vote. 



********************************************************************************
* Further data management for covariates ***************************************
********************************************************************************

* Creating empty strings and missing values*
ds *_09 *_10																	// Removing covariates for years when the individual did not become a citizen
foreach k in `r(varlist)' {
	capture confirm string variable `k'
	if !_rc {
	replace `k' = "" if year_ctz == 1994  
	}
	else {
	replace `k' = . if year_ctz == 1994  
	}
}

ds *_93 *_94
foreach k in `r(varlist)' {
	capture confirm string variable `k'
	if !_rc {
	replace `k' = "" if year_ctz == 2010  
	}
	else {
	replace `k' = . if year_ctz == 2010  
	}
}

* Gender
rename Kon sex
replace sex = 0 if sex == 2


* Having children
foreach k in 94 10 {
	gen parent_`k' = 0
	replace parent_`k' = 1 if (Barn0_3_`k' > 0 | Barn4_6_`k' > 0 | Barn7_10_`k' > 0 | Barn11_15_`k' > 0 | Barn16_17_`k' > 0)
}
replace parent_10 = . if year_ctz == 1994 
replace parent_94 = . if year_ctz == 2010 

gen parent = .
replace parent = parent_10 if parent_94 == .
replace parent = parent_94 if parent_10 == .
drop parent_* 

* Number of children*
foreach k in 94 10 {
	gen children_`k' = Barn0_3_`k' + Barn4_6_`k' + Barn7_10_`k' + Barn11_15_`k' + Barn16_17_`k'
} 
replace children_10 = . if year_ctz == 1994 
replace children_94 = . if year_ctz == 2010 

gen children = .
replace children = children_10 if children_94 == .
replace children = children_94 if children_10 == .
drop children_* 


* Generate variable for years of education from education level variable * 
foreach k in 94 10 {															// According to the convention at IFAU. 
	gen double yearseducation_`k' = .
	replace yearseducation_`k' = 6.6  if Sun2000niva_old_`k' == "*"
	replace yearseducation_`k' = 7.5  if Sun2000niva_old_`k' == "1"
	replace yearseducation_`k' = 9.4  if Sun2000niva_old_`k' == "2"
	replace yearseducation_`k' = 11.2 if Sun2000niva_old_`k' == "3"
	replace yearseducation_`k' = 12.4 if Sun2000niva_old_`k' == "4"
	replace yearseducation_`k' = 14.2 if Sun2000niva_old_`k' == "5"
	replace yearseducation_`k' = 17   if Sun2000niva_old_`k' == "6"
	replace yearseducation_`k' = 20.4 if Sun2000niva_old_`k' == "7"
}
replace yearseducation_10 = . if year_ctz == 1994 
replace yearseducation_94 = . if year_ctz == 2010 
gen yearseducation = .
replace yearseducation = yearseducation_10 if yearseducation_94 == .
replace yearseducation = yearseducation_94 if yearseducation_10 == .
drop yearseducation_* 

 
* Dummy for living in a big city (Stockholm, Göteborg or Malmö)
gen bigcity = 1 if (Kommun == 1280 | Kommun == 1480 | Kommun == 180)
replace bigcity = 0 if bigcity == .

* Employed *										
*2009-2010*
foreach k in 09 10 {
	gen sysselsatt_`k' = .
	replace sysselsatt_`k' = 1 if SyssStatJ_`k' == 1
	replace sysselsatt_`k' = 0 if (SyssStatJ_`k' == 5 | SyssStatJ_`k' == 6)
}
*1993-1994*
foreach k in 93 94 {
	gen sysselsatt_`k' = .
	replace sysselsatt_`k' = 1 if SyssStat_`k' == 1
	replace sysselsatt_`k' = 0 if (SyssStat_`k' == 5 | SyssStat_`k' == 6)
}

* Social welfare (family variable) *										
*2009-2010*
foreach k in 09 10 {
	gen welfare_`k' = .
	replace welfare_`k' = 1 if (SocBidrPersF04_`k' > 0 & SocBidrPersF04_`k'!=.)
	replace welfare_`k' = 0 if SocBidrPersF04_`k' == 0
}
*1993-1994*
foreach k in 93 94 {
	gen welfare_`k' = .
	replace welfare_`k' = 1 if (SocBidrPersF_`k' > 0 & SocBidrPersF_`k'!=.)
	replace welfare_`k' = 0 if SocBidrPersF_`k' == 0
}

* Generate a joint variable for pre treatment employment status*
gen pre_emp = .
replace pre_emp = sysselsatt_09 if year_ctz == 2010
replace pre_emp = sysselsatt_93 if year_ctz == 1994


* Generate a joint variable for pre treatment income*
gen pre_ink = .
replace pre_ink = LoneInk_09 if year_ctz == 2010  
replace pre_ink = LoneInk_93 if year_ctz == 1994 

* Generate a joint variable for pre treatment welfafe*
gen pre_welf = .
replace pre_welf = welfare_09 if year_ctz == 2010  
replace pre_welf = welfare_93 if year_ctz == 1994 

* Generate a joint variable for welfafe*
gen welf = .
replace welf = welfare_10 if year_ctz == 2010  
replace welf = welfare_94 if year_ctz == 1994 


* Residualize labor market and education variables to compensate for pure year effects*
reg pre_ink year_ctz															// To compensate for inflation between these years 
predict ehat_pre_ink, residuals

reg pre_emp year_ctz															// To compensate for the economic crisis in the 1990's
predict ehat_pre_emp, residuals

reg pre_welf year_ctz
predict ehat_pre_welf, residuals

reg yearseducation year_ctz														// To compensate for overall higher education between these two years
predict ehat_yeduc, residuals

*akassa
gen unmploin = 1 if Akassa_10 > 0 & Akassa_10 != .
replace unmploin = 0 if Akassa_10 == 0
replace unmploin = 1 if Akassa_94 > 0 & Akassa_94 != .
replace unmploin = 0 if Akassa_94 == 0


********************************************************************************
* Further datamanagement outcome variables *************************************
********************************************************************************

* Eligible to vote*
gen eligeble = .
replace eligeble = 0 if r_94 == 3 & year_ctz == 1994
replace eligeble = 1 if r_94 == 1 | r_94 == 2 | r_94 > 3  & year_ctz == 1994 
replace eligeble = 0 if r_10 == 3 & year_ctz == 2010
replace eligeble = 1 if (r_10 == 1 | r_10 == 2 | r_10 == 4 | r_10 == 5 | r_10 == 6) & year_ctz == 2010


* Voted parliament*
gen voted_r = .
replace voted_r = 1 if r_94 == 2 | r_94 == 4 | r_94 == 5 | r_94 == 6 & year_ctz == 1994
replace voted_r = 0 if r_94 == 1 & year_ctz == 1994
replace voted_r = 1 if r_10 == 2 | r_10 == 4 | r_10 == 5 | r_10 == 6 & year_ctz == 2010
replace voted_r = 0 if r_10 == 1 & year_ctz == 2010


* Voted municipal and county *
gen voted_local = .
replace voted_local = 1 if k_94 == 2 | k_94 == 4 | k_94 == 5 | k_94 == 6 | l_94 == 2 | l_94 == 4 | l_94 == 5 | l_94 == 6 & year_ctz == 1994   
replace voted_local = 0 if k_94 == 1 & l_94 == 1 & year_ctz == 1994
replace voted_local = 1 if k_10 == 2 | k_10 == 4 | k_10 == 5 | k_10 == 6 | l_10 == 2 | l_10 == 4 | l_10 == 5 | l_10 == 6 & year_ctz == 2010   
replace voted_local = 0 if k_10 == 1 & l_10 == 1 & year_ctz == 2010

drop l_* k_* r_*

drop if voted_local == .														// We only study those for which we have inforamtion on voter turnout. 


* Generate voter turnout variable for other immigrants that live in the same parish in the same year (spill over variable) *
gen forsimmig = string(Forsamling) + "_" + string(IFAUkod) + "_" + string(year_ctz)
replace forsimmig = "" if Forsamling == .
replace forsimmig = "" if IFAUkod == .
replace forsimmig = "" if year_ctz == .


gen voted_local_temp = voted_local
replace voted_local_temp = . if Tgroup == 3										// All those who were enrolled to vote in the national election are removed. 
bysort forsimmig: egen turnimmig_local_turnout = mean(voted_local_temp) 		// Local voter turnout for those who where not eligible to vote in the national election. 
replace turnimmig_local_turnout = . if Forsamling == .							// Spill-over variable focused on those who became citizens in the same year.
replace turnimmig_local_turnout = . if IFAUkod == .
replace turnimmig_local_turnout = . if year_ctz == .
drop voted_local_temp 
*
save "$usingdata/temp2.dta", replace
*

********************************************************************************
** Adding voter turnout data for spouses that became citizens the same year but after election (spill over variable) **
********************************************************************************


use "$usingdata/LISA_2010", clear
gen spouse = 1 if FamStF<200
replace spouse = . if FamStF == 0
keep if spouse == 1
keep LopNr FamId_10 
duplicates tag FamId_10, gen(dup)
keep if dup == 1

bysort FamId_10 (LopNr): gen LopNr_spouse = LopNr[_n+1] 						// As an alternative to the reshape command which cannot handle these many variable
bysort FamId_10 (LopNr): replace LopNr_spouse = LopNr[_n-1] if LopNr_spouse == .   
keep LopNr LopNr_spouse
rename LopNr LopNr_i
rename LopNr_spouse LopNr

merge 1:1 LopNr using "$usingdata/Valdelt_2010", nogen keep(1 3)
gen voted_local_spouse_10 = .
replace voted_local_spouse_10 = 1 if k_10 == 2 | k_10 == 4 | k_10 == 5 | k_10 == 6 | l_10 == 2 | l_10 == 4 | l_10 == 5 | l_10 == 6   
replace voted_local_spouse_10 = 0 if k_10 == 1 & l_10 == 1 

merge 1:1 LopNr using "$usingdata/Fodelseuppg", nogen keep(1 3) keepusing(LopNr F_delselandGrupp)
rename F_delselandGrupp F_delselandGrupp_spouse_10 
merge 1:1 LopNr using "$usingdata/RTB_2010", nogen keep(1 3) keepusing(LopNr Medblandnamn MedBdat)
rename Medblandnamn Medblandnamn_spouse_10 

rename LopNr LopNr_spouse
rename LopNr_i LopNr

tostring MedBdat, replace
gen year_ctz = substr(MedBdat, 1, 4)											// Spouses that become Swedish citizens in 2010, but after the election	
gen month_ctz = substr(MedBdat, 5, 2)
gen day_ctz = substr(MedBdat, 7, 2)

destring month_ctz year_ctz day_ctz, replace
gen edate = mdy(month_ctz, day_ctz, year_ctz)
keep if edate > mdy(9, 19, 2010) 

keep LopNr LopNr_spouse voted_local_spouse_10 F_delselandGrupp_spouse_10 Medblandnamn_spouse_10

drop if voted_local_spouse_10 == .  											// Keep those that were eligible to vote in the local election
save "$usingdata/spouse_2010", replace

*1994*
use "$usingdata/LISA_1994", clear
gen spouse = 1 if FamStF<200
replace spouse = . if FamStF == 0
keep if spouse == 1
keep LopNr FamId_94 
duplicates tag FamId_94, gen(dup)
keep if dup == 1

bysort FamId_94 (LopNr): gen LopNr_spouse = LopNr[_n+1] 						
bysort FamId_94 (LopNr): replace LopNr_spouse = LopNr[_n-1] if LopNr_spouse == .   
keep LopNr LopNr_spouse
rename LopNr LopNr_i
rename LopNr_spouse LopNr

merge 1:1 LopNr using "$usingdata/Valdelt_1994", nogen keep(1 3)
gen voted_local_spouse_94 = .
replace voted_local_spouse_94 = 1 if k_94 == 2 | k_94 == 4 | k_94 == 5 | k_94 == 6 | l_94 == 2 | l_94 == 4 | l_94 == 5 | l_94 == 6   
replace voted_local_spouse_94 = 0 if k_94 == 1 & l_94 == 1 

merge 1:1 LopNr using "$usingdata/Fodelseuppg", nogen keep(1 3) keepusing(LopNr F_delselandGrupp)
rename F_delselandGrupp F_delselandGrupp_spouse_94 
merge 1:1 LopNr using "$usingdata/RTB_1998", nogen keep(1 3) keepusing(LopNr Medblandnamn MedBdat)
rename Medblandnamn Medblandnamn_spouse_94 

rename LopNr LopNr_spouse
rename LopNr_i LopNr

tostring MedBdat, replace
gen year_ctz = substr(MedBdat, 1, 4)
gen month_ctz = substr(MedBdat, 5, 2)
gen day_ctz = substr(MedBdat, 7, 2)

destring month_ctz year_ctz day_ctz, replace									// Spouses that become Swedish citizens in 1994, but after the election	
gen edate = mdy(month_ctz, day_ctz, year_ctz)
keep if edate > mdy(9, 18, 1994) 

keep LopNr LopNr_spouse voted_local_spouse_94 F_delselandGrupp_spouse_94 Medblandnamn_spouse_94  

drop if voted_local_spouse_94 == .
save "$usingdata/spouse_1994", replace

use "$usingdata/spouse_2010", clear
append using "$usingdata/spouse_1994" 

gen voted_local_spouse = .
replace voted_local_spouse = voted_local_spouse_94 if voted_local_spouse_10 == .
replace voted_local_spouse = voted_local_spouse_10 if voted_local_spouse_94 == .

foreach k in F_delselandGrupp_spouse Medblandnamn_spouse {
	gen `k' = ""
	replace `k' = `k'_94 if `k'_10 == ""
	replace `k' = `k'_10 if `k'_94 == ""
}
duplicates tag LopNr, gen(dup)
drop if dup > 0
drop dup
keep LopNr voted_local_spouse F_delselandGrupp_spouse Medblandnamn_spouse
save "$usingdata/spouse", replace


*********************************************************************************
* Number of indiviudals from each country: For heterogeneity analysis ***********
*********************************************************************************

clear all

*1994

*Countries of origin codes:
use "$rawdata/RTB/FodelselandIFAUGrupp_2020", clear

*Drop Sweden and nordic countries
drop if Fodelselandgrupp=="01"
drop if Fodelselandgrupp == "26" | Fodelselandgrupp== "27" | Fodelselandgrupp == "28"			

*Merge on population data for relevant year, keep those in country in 1994
merge 1:m LopNr using "$rawdata/RTB/RTB_1994"
keep if _merge == 3
drop _merge

*generate variable to collapse
gen pop =1

*Collapse over country codes
collapse (count) pop, by(Fodelselandgrupp)

*Rename, add and reclassify variable for merging purposes
rename Fodelselandgrupp IFAUkod
gen year_ctz =1994
destring IFAUkod, replace
drop if IFAUkod==.

*Create cumulative distribution
cumul(pop), gen(cum_pop)

save "$usingdata/immpercountry1994.dta", replace


*Redo for 2010
clear all

use "$rawdata/RTB/FodelselandIFAUGrupp_2020", clear

drop if Fodelselandgrupp=="01"
drop if Fodelselandgrupp == "26" | Fodelselandgrupp== "27" | Fodelselandgrupp == "28"			

merge 1:m LopNr using "$rawdata/RTB/RTB_2010"
keep if _merge==3
drop _merge

gen pop =1

collapse (count) pop, by(Fodelselandgrupp)

rename Fodelselandgrupp IFAUkod
gen year_ctz =2010
destring IFAUkod, replace
drop if IFAUkod==.

*Create cumulative distribution
cumul(pop), gen(cum_pop)

save "$usingdata/immpercountry2010", replace


*One file:
append using "$usingdata/immpercountry1994"
 
save "$usingdata/immpercountry.dta", replace



********************************************************************************
* Putting it all together, final order and save ********************************
********************************************************************************

use "$usingdata/temp2.dta", clear												// All the rest generated before the datamanagement for spouse-data

merge 1:1 LopNr using "$usingdata/spouse", nogen keep(1 3)
merge m:1 IFAUkod year_ctz using "$usingdata/immpercountry"
drop if _merge==2 /*no individuals from 1 country (code 53) - drop */
drop _merge 

*Countries of origin

tab F_delselandGrupp, gen(region)
rename region1 africa
rename region2 asia
rename region3 EU
rename region4 europe_nonEU
rename region9 southam
drop region5 region6 region7 region8
gen europe = 1 if EU == 1 | europe_nonEU == 1
replace europe = 0 if europe == .

*Number of immigrants already living in the country
gen large_diaspora = 1 if cum_pop>=0.5
replace large_diaspora = 0 if cum_pop<=0.5

*
order LopNr voted_local
compress
save "$usingdata/finaldata/finaldata_immigrants.dta", replace



********************************************************************************
* REFEREDUMDUM DATA FROM 2003 **************************************************
********************************************************************************



*** Importing the 2003 and 2002 samples of voter turnout:

* First 2002
use "$rawdata/VD/Valdelt_91_94_98_02_06_10_14_AV.dta", clear

* keeping only turnout in the 2002 elections
keep if valar == 2002
		
* we save this temporarily:
save "$usingdata/turnout_2002.dta", replace

* Importing the 2003 referendum turnout
use "$rawdata/VD/Valdelt_03_FO.dta", clear

* rename some variables
rename rostratt rostratt_EU

* We keep only the ID, the eligibility variabe and the turnout variable:
keep LopNr rostratt_EU folkrost

* We merge this data with the turnout 2002 data:

merge 1:1 LopNr using "$usingdata/turnout_2002"

* we use the _merge variable to indicate for which elections we have available turnout data:
gen turnout_avail = "2002"
replace turnout_avail = "2003" if _merge == 1
replace turnout_avail = "Both" if _merge == 3
drop _merge 

* Next, we merge this data with information on these individuals immigration year, to
* determine how long they've been in Sweden before the elections

save "$usingdata/turnout_2002_2003", replace

* merging with the immigration/emigration data:
merge 1:m LopNr using "$rawdata/RTB/Migrationer.dta", keep(3) nogen

* We only keep individuals who have, at some point, immigrated to Sweden
keep if PostTyp == "Inv"

* We translate the immigration date variable into years
tostring(Datum),replace
gen immig_year = substr(Datum,1,4)
destring(immig_year), replace

* Next, we generate a variable indicating the first immig year for each individual
bysort LopNr: egen first_year = min(immig_year)

* We will now create an indicator variables for whether an individual only 
* immigrated once

bysort LopNr: gen immig_once = _N
replace immig_once = 0 if immig_once>1

* We then drop all observations that do not indicate the first immigration year:
keep if first_year == immig_year

* We still have some duplicates for individuals who immigrated more than once during the same year
* For these individuals, we just keep the first occurrence:
bysort LopNr: gen n = _n
keep if n == 1 


* We now match this with data on citizenship:

merge 1:m LopNr using "$rawdata/RTB/RTB_2003", keep(3) nogen

save "$usingdata/turnout_2002_2003_RTB", replace

use "$usingdata/turnout_2002_2003_RTB", clear

* We only keep individuals who 1) have never become citizens, and 
* 2) were not citizens at the time of the 2003 referendum

keep if MedBdat == 0 | MedBdat > 20030914

* We generate turnout variables for the referendum and the local elections:
gen EUvote = folkrost == 1
replace EUvote =. if folkrost ==.

gen local_vote = komrost == 1
replace local_vote = 1 if landrost == 1
 replace local_vote = . if komrost == .
 
* We then generate a variable for the years between the election/referendum year 
* and  (first) year of immigration

gen dist_year_local = 2002-first_year
gen dist_year_eu = 2003-first_year

* Before saving the final version of the data, we generate a variable indicating whether the
* immigrants were from another Nordic country:
gen nordic = 0
replace nordic = 1 if Landnamn == "Norden utom Sverige"

* Or another EU country:
gen eu_country = 0
replace eu_country = 1 if nordic == 1
replace eu_country = 1 if Landnamn == "EU28 utom Norden"


* Lastly, we drop anyone who is eligible to vote in the national election (given that
* immigration status or citizenship has been misscoded):
drop if rostratt == 1

keep LopNr EUvote local_vote dist_year_local dist_year_eu Landnamn immig_once nordic eu_country

* We save this final version of the data:

save "$usingdata/finaldata/turnout_2002_2003", replace


********************************************************************************
* Novelty effect ***************************************************************
********************************************************************************


* Additional election years: 1998 and 2006**


foreach k in 1998 2006 {														// Citizenship dates for 1994 are found in the file from 1998.
	use "$usingdata/RTB_`k'", clear 
	tostring MedBdat, replace

	gen year_ctz = substr(MedBdat, 1, 4)										// Generate varible date of citezenship. 
	gen month_ctz = substr(MedBdat, 5, 2)
	gen day_ctz = substr(MedBdat, 7, 2)

	destring month_ctz year_ctz day_ctz, replace

	* Only keeping those who became Swedish citizens in 2006 or 1991 *			// Only Swedish citizens have the right to vote in parliamentary elections. Where are only interested in those who eventually became citizens in the RD analysis. 
	keep if trim(Medblandnamn) == "Sverige" 
		if `k' == 2006 {
		keep if year_ctz == `k' 
		}
		else {
		keep if year_ctz == 1991
		}

	* Assert that there are no duplicates *
	duplicates tag LopNr, gen(dup)
	assert dup == 0
	drop dup
	save "$usingdata/temp`k'_rev", replace
}

use "$usingdata/temp1998_rev"
append using "$usingdata/temp2006_rev" 
* Assert that there are no duplicates *
	duplicates tag LopNr, gen(dup)
	drop if dup!=0
	drop dup


* Adding covariates *

* Birth data *
merge 1:1 LopNr using "$usingdata/Fodelseuppg.dta", nogen keep(1 3)

* Age *																			
tostring FodArMan, replace

gen year_born = substr(FodArMan, 1, 4) 
gen month_born = substr(FodArMan, 5, 2)
destring year_born month_born, replace

gen age = year_ctz - year_born

* Drop indiviudals that are under 18
drop if age < 18																// Drop those that are not 18. These are not eligible to vote
drop if age == 18 & month_born > 8												// Drop if you turn 18 between September - December. Election day is in September.


* Country of origin (grouped)*
merge 1:1 LopNr using "$usingdata/FodelselandIFAUGrupp", nogen keep(1 3)

* Drop nordic and Swedish born *
drop if (IFAUkod == 0 | IFAUkod == 26 | IFAUkod == 27 | IFAUkod == 28)			// Nordic citizens have other rules for voting in elections and are hence dropped. 
																					

* Adding outcome variables *

* Voter turnout 2010 *
merge 1:1 LopNr using "$usingdata/Valdelt_2010", nogen keep(1 3)

* Voter turnout 1994 *
merge 1:1 LopNr using "$usingdata/Valdelt_1994", nogen keep(1 3)


* Define variable for right to vote in national elections *
gen Rostratt = min(Rostratt_94, Rostratt_10)
gen DRostratt = Rostratt == 1 if Rostratt < .
label var DRostratt "Right. vote, national" 


save "$usingdata/temp1_rev.dta", replace


* Generate running variables for novelty RD-analyses*

* Citizenship in one date variable *
gen edate = mdy(month_ctz, day_ctz, year_ctz)

* Treatment: Included in election roll for voting in the national election *
gen days_treat_enrolled = .
replace days_treat_enrolled = mdy(8,17,2006) - edate if year_ctz == 2006 		// 30 days before the election			
replace days_treat_enrolled = mdy(7,1,1991) - edate if year_ctz == 1991			// 76 days before the election				

* Treatment: Being a citizen at the time of the gereral election *
gen days_treat_citizen = .
replace days_treat_citizen = mdy(9,17,2006) - edate if year_ctz == 2006			// Election day, September 17
replace days_treat_citizen = mdy(9,15,1991) - edate if year_ctz == 1991			// Election day, September 15


* Define treatment groups variable *
gen Tgroup = .
replace Tgroup = 1 if days_treat_citizen < 0									// After the election. Not citizen, not able to vote
replace Tgroup = 2 if days_treat_citizen >= 0 & days_treat_enrolled < 0			// Middle group. Citizens without the right to vote. 	
replace Tgroup = 3 if days_treat_enrolled >= 0 									// Citizens with the right to vote. 

* Eligible to vote*
gen eligeble = .
replace eligeble = 0 if r_94 == 3 & year_ctz == 1991
replace eligeble = 1 if r_94 == 1 | r_94 == 2 | r_94 > 3  & year_ctz == 1991 
replace eligeble = 0 if r_10 == 3 & year_ctz == 2006
replace eligeble = 1 if (r_10 == 1 | r_10 == 2 | r_10 == 4 | r_10 == 5 | r_10 == 6) & year_ctz == 2006


* Voted parliament*
gen voted_r = .
replace voted_r = 1 if r_94 == 2 | r_94 == 4 | r_94 == 5 | r_94 == 6 & year_ctz == 1991
replace voted_r = 0 if r_94 == 1 & year_ctz == 1991
replace voted_r = 1 if r_10 == 2 | r_10 == 4 | r_10 == 5 | r_10 == 6 & year_ctz == 2006
replace voted_r = 0 if r_10 == 1 & year_ctz == 2006


* Voted municipal and county *
gen voted_local = .
replace voted_local = 1 if k_94 == 2 | k_94 == 4 | k_94 == 5 | k_94 == 6 | l_94 == 2 | l_94 == 4 | l_94 == 5 | l_94 == 6 & year_ctz == 1991   
replace voted_local = 0 if k_94 == 1 & l_94 == 1 & year_ctz == 1991
replace voted_local = 1 if k_10 == 2 | k_10 == 4 | k_10 == 5 | k_10 == 6 | l_10 == 2 | l_10 == 4 | l_10 == 5 | l_10 == 6 & year_ctz == 2006  
replace voted_local = 0 if k_10 == 1 & l_10 == 1 & year_ctz == 2006

drop l_* k_* r_*

drop if voted_local == .														// We only study those for which we have inforamtion on voter turnout. 


*
save "$usingdata/noveltyanalysis.dta", replace
*


*****************************************
capture log close
clear all
	